## FID_1 X Y ID
## Min. : 290 Min. :195331 Min. : 93704 Length:2541
## 1st Qu.: 8164 1st Qu.:217492 1st Qu.:127264 Class :character
## Median :13670 Median :224149 Median :138824 Mode :character
## Mean :12669 Mean :229780 Mean :137604
## 3rd Qu.:16992 3rd Qu.:240142 3rd Qu.:147500
## Max. :22990 Max. :291500 Max. :163250
##
## Pb Zn Cu SIGLE_PEDO
## Min. : 8.127 Min. : 5.80 Min. : 2.016 Length:2541
## 1st Qu.: 26.135 1st Qu.: 85.12 1st Qu.: 12.905 Class :character
## Median : 32.000 Median : 112.53 Median : 15.485 Mode :character
## Mean : 52.612 Mean : 270.92 Mean : 22.576
## 3rd Qu.: 48.916 3rd Qu.: 182.60 3rd Qu.: 20.000
## Max. :1149.970 Max. :25364.31 Max. :2234.267
## NA's :1038 NA's :157 NA's :273
## region_etm GS
## Min. : 2.000 Min. : 3.00
## 1st Qu.: 6.000 1st Qu.: 9.00
## Median : 7.000 Median : 9.00
## Mean : 6.749 Mean :12.57
## 3rd Qu.: 8.000 3rd Qu.:16.00
## Max. :17.000 Max. :43.00
##
## FID_1 X Y ID Pb Zn Cu SIGLE_PEDO
## 1: 409 238401 153664 02/LA00951 32.14414 10.97368 13.85714 Aba1
## 2: 654 237838 152974 03/LA01232 18.59821 NA 12.00000 Aba1
## 3: 511 224562 137188 02/LA04725 32.14414 135.18421 14.80952 Aba1
## 4: 473 214050 132070 02/LA04323 40.25225 57.55263 12.90476 Aca0
## 5: 544 214043 133536 02/LA05640 NA 175.97368 10.04762 Ada0
## 6: 512 221460 135511 02/LA04727 45.65766 252.28947 15.76190 Ada1
## region_etm GS
## 1: 6 9
## 2: 6 9
## 3: 6 9
## 4: 6 9
## 5: 6 9
## 6: 6 9
# Create grid (using a margin to make sure you englobe the whole province)
gridsize = 1000
margin = 5000
x <- seq(floor(min(mydata$X)-margin), # from minimum longitude
ceiling(max(mydata$X+margin)), # to maximum longitude
by=gridsize)
y <- seq(floor(min(mydata$Y)-margin), # from minimum latitude
ceiling(max(mydata$Y)+margin), # to maximum latitude
by=gridsize)
grid <- as.data.table(expand.grid(X=x, Y=y))
# Create a spatial version of your grid
mydataSpatial <- copy(grid)
coordinates(mydataSpatial) <- ~X+Y
proj4string(mydataSpatial) <- CRS("+init=epsg:31370") # Specify coordinate system (Lambert belge 1972)# Load provinces shapefile and specify it's coordinate system
prov <- readOGR('BEL_ADM2.shp', use_iconv = TRUE, encoding = "UTF-8")## Warning: OGR support is provided by the sf and terra packages among others
## Warning: OGR support is provided by the sf and terra packages among others
## Warning: OGR support is provided by the sf and terra packages among others
## Warning: OGR support is provided by the sf and terra packages among others
## Warning: OGR support is provided by the sf and terra packages among others
## Warning: OGR support is provided by the sf and terra packages among others
## Warning: OGR support is provided by the sf and terra packages among others
## OGR data source with driver: ESRI Shapefile
## Source: "C:\Users\nicd\Desktop\Portfolio\templates\project\mod_spatiale\BEL_ADM2.shp", layer: "BELGIUM_-_Provinces"
## with 11 features
## It has 8 fields
## [1] "Antwerpen" "Bruxelles" "Vlaams Brabant" "Brabant Wallon"
## [5] "West-Vlaanderen" "Oost-Vlaanderen" "Hainaut" "Liège"
## [9] "Limburg" "Luxembourg" "Namur"
# Transform grid coordinates system to match provinces
mydataSpatial <- spTransform(mydataSpatial, CRS("+init=epsg:31370"))## Warning: PROJ support is provided by the sf and terra packages among others
# Get index of points in the province of Liège
prov.id <- over(prov[prov$NAME_2 == "Liège", ],
mydataSpatial,
returnList = TRUE)
# Select rows in your original grid that are located in Liège
prov.grid = grid[prov.id[[1]],]# Display selected grid points in red and data location
ggplot()+
geom_point(data=prov.grid, aes(x=X, y=Y), color='black', shape = 3)+
geom_point(data=grid, aes(x=X, y=Y), color='black', shape = 3) +
geom_point(data=prov.grid, aes(x=X, y=Y), color='red', shape = 3)+
geom_point(data=mydata,aes(x=X,y=Y,shape="Point de mesure"),color="black")+
ggtitle("Limite de la province étudiée (Liège) et positions des points de mesures")+
xlab("Longitude (Lambert belge 1972)")+
ylab("Latitude (Lambert belge 1972)")+
scale_shape_manual("",values=15)Pb <- na.omit(subset(mydata,select=c("X","Y","Pb")))
ggplot() +
geom_point(data=Pb, aes(x=X, y=Y, color=Pb), size=2) +
scale_color_gradient(low='blue',high ='yellow')+
xlab("Longitude") +
ylab("Latitude") +
ggtitle("Sample Pb")Zn <- na.omit(subset(mydata,select=c("X","Y","Zn")))
#Plot
ggplot() +
geom_point(data=Zn, aes(x=X, y=Y, color=Zn), size=2) +
scale_color_gradient(low='blue',high ='yellow')+
xlab("Longitude") +
ylab("Latitude") +
ggtitle("Sample Zn")Cu <- na.omit(subset(mydata,select=c("X","Y","Cu")))
#Plot
ggplot() +
geom_point(data=Cu, aes(x=X, y=Y, color=Cu), size=2) +
scale_color_gradient(low='blue',high ='yellow')+
xlab("Longitude") +
ylab("Latitude") +
ggtitle("Sample Cu")| Pb | Zn | Cu |
|---|---|---|
| Min. : 8.127 | Min. : 5.80 | Min. : 2.016 |
| 1st Qu.: 26.135 | 1st Qu.: 85.12 | 1st Qu.: 12.905 |
| Median : 32.000 | Median : 112.53 | Median : 15.485 |
| Mean : 52.612 | Mean : 270.92 | Mean : 22.576 |
| 3rd Qu.: 48.916 | 3rd Qu.: 182.60 | 3rd Qu.: 20.000 |
| Max. :1149.970 | Max. :25364.31 | Max. :2234.267 |
| NA’s :1038 | NA’s :157 | NA’s :273 |
ggplot(Pb, aes(x = Pb)) +
geom_histogram(aes(y = ..density..,
fill = "Density histogram" ),
bins = 15,
color = "white") +
geom_density(col = "lightsteelblue3",
aes(fill = "Fitted pdf"),
alpha = 0.2) +
xlab("Pb") +
ylab("f(x)") +
ggtitle("Density histogram of the plomb") +
stat_function(fun=dnorm,
args=list(mean = mean(Pb$Pb), sd = sd(Pb$Pb)),
aes(color="Normal pdf"),
size= 1) +
scale_fill_manual("", values = c("Density histogram"="lightsteelblue2",
"Fitted pdf"=alpha("lightsteelblue3",.2))) +
scale_color_manual("", values="red")## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
ggplot(Zn, aes(x = Zn)) +
geom_histogram(aes(y = ..density..,
fill = "Density histogram" ),
bins = 15,
color = "white") +
geom_density(col = "lightsteelblue3",
aes(fill = "Fitted pdf"),
alpha = 0.2) +
xlab("Zn") +
ylab("f(x)") +
ggtitle("Density histogram of the Zinc") +
stat_function(fun=dnorm,
args=list(mean = mean(Zn$Zn), sd = sd(Zn$Zn)),
aes(color="Normal pdf"),
size= 1) + # width of the line
scale_fill_manual("", values = c("Density histogram"="lightsteelblue2",
"Fitted pdf"=alpha("lightsteelblue3",.2))) +
scale_color_manual("", values="red")ggplot(Cu, aes(x = Cu)) +
geom_histogram(aes(y = ..density..,
fill = "Density histogram" ),
bins = 15, # number of bins
color = "white") +
geom_density(col = "lightsteelblue3",
aes(fill = "Fitted pdf"),
alpha = 0.2) +
xlab("Cu") +
ylab("f(x)") +
ggtitle("Density histogram of the Cuivre") +
stat_function(fun=dnorm,
args=list(mean = mean(Cu$Cu), sd = sd(Cu$Cu)),
aes(color="Normal pdf"),
size= 1) + # width of the line
scale_fill_manual("", values = c("Density histogram"="lightsteelblue2",
"Fitted pdf"=alpha("lightsteelblue3",.2))) +
scale_color_manual("", values="red")Pbbx.lm <- lm(Pbbx~X+Y,data=Pb)
scatter3d(x=Pb$X,z=Pb$Y,y=Pb$Pbbx,
xlab = 'Longitude', zlab = 'Lattitude', ylab = 'Pbbx')## Le chargement a nécessité le package : mgcv
Non stationnaire d’ordre 1 + on voit des outliers
# Suppression tendance
Pb$Pbbx_res <- Pb$Pbbx-(predict(Pbbx.lm,Pb))
scatter3d(x=Pb$X,z=Pb$Y,y=Pb$Pbbx_res,
xlab='Longitude',zlab='Latitude',ylab='Pbbx_res')
rglwidget(width = 300,height = 300)
# Détection potentiel Outlier
res.Sd <- sd(Pb$Pbbx_res)
res.Mean <- mean(Pb$Pbbx_res)
out <- nrow(Pb) - nrow(Pb[Pbbx_res %between% c(res.Mean-3*res.Sd,res.Mean+3*res.Sd),])
cat("nombre d'outlier potentiel =", out)## nombre d'outlier potentiel = 15
Pb.gstat <- gstat(formula=Pbbx_res~1, data=Pb,locations = ~X+Y)
Pb.vario <- variogram(Pb.gstat,cutoff=40000,width=3000)
vg.exp <- vgm(model = 'Sph',nugget = 2e-4)
fit.Pb <- fit.variogram(Pb.vario, model = vg.exp,fit.method = 6)
plot(Pb.vario,model=fit.Pb, main="Variogram résidus du Plomb Bx",pch=16,col='black')
trellis.focus('panel',1,1)
llines(x=c(0,max(Pb.vario$dist)),y=c(var(Pb$Pbbx_res)),col = 'red',lty=2)## NULL
| model | psill | range | kappa | ang1 | ang2 | ang3 | anis1 | anis2 |
|---|---|---|---|---|---|---|---|---|
| Nug | 0.0001076 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
| Sph | 0.0004151 | 20021 | 0.5 | 0 | 0 | 0 | 1 | 1 |
Znbx.lm <- lm(Znbx~X+Y,data=Zn)
scatter3d(x=Zn$X,z=Zn$Y,y=Zn$Znbx,
xlab = 'Longitude', zlab = 'Lattitude', ylab = 'Znbx')
rglwidget(width = 300, height = 300)
Non stationnaire d’ordre 1 + on voit des outliers
# Suppressions tendance
Zn$Znbx_res <- Zn$Znbx-(predict(Znbx.lm,Zn))
scatter3d(x=Zn$X,z=Zn$Y,y=Zn$Znbx_res,
xlab='Longitude',zlab='Latitude',ylab='Znbx_res')
rglwidget(width = 300,height = 300)
# Détection potentiel Outlier
res.Sd <- sd(Zn$Znbx_res)
res.Mean <- mean(Zn$Znbx_res)
out <- nrow(Zn) - nrow(Zn[Znbx_res %between% c(res.Mean-3*res.Sd,res.Mean+3*res.Sd),])
cat("nombre d'outlier potentiel =", out)## nombre d'outlier potentiel = 47
Zn.gstat <- gstat(formula=Znbx_res~1, data=Zn,locations = ~X+Y)
Zn.vario <- variogram(Zn.gstat,cutoff=40000,width=3000)
vg.exp <- vgm(model = 'Sph',nugget = 0.002)
fit.Zn <- fit.variogram(Zn.vario, model = vg.exp,fit.method = 6)
plot(Zn.vario,model=fit.Zn ,main="Variogram résidus du Zinc",pch=16,col='black')
trellis.focus('panel',1,1)
llines(x=c(0,max(Zn.vario$dist)),y=c(var(Zn$Znbx_res)),col = 'red',lty = 2)## NULL
| model | psill | range | kappa | ang1 | ang2 | ang3 | anis1 | anis2 |
|---|---|---|---|---|---|---|---|---|
| Nug | 0.001452 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
| Sph | 0.007236 | 17386 | 0.5 | 0 | 0 | 0 | 1 | 1 |
Cubx.lm <- lm(Cubx~X+Y,data=Cu)
scatter3d(x=Cu$X,z=Cu$Y,y=Cu$Cubx,
xlab = 'Longitude', zlab = 'Lattitude', ylab = 'Cubx')
rglwidget(width = 300, height = 300)
Non stationnaire d’ordre 1 + on voit des outliers
# Suppressions tendance
Cu$Cubx_res <- Cu$Cubx-(predict(Cubx.lm,Cu))
scatter3d(x=Cu$X,z=Cu$Y,y=Cu$Cubx_res,
xlab='Longitude',zlab='Latitude',ylab='Cubx')
rglwidget(width = 300,height = 300)
# Détection potentiel Outlier
res.Sd <- sd(Cu$Cubx_res)
res.Mean <- mean(Cu$Cubx_res)
out <- nrow(Cu) - nrow(Cu[Cubx_res %between% c(res.Mean-3*res.Sd,res.Mean+3*res.Sd),])
cat("nombre d'outlier potentiel =", out)## nombre d'outlier potentiel = 34
Cu.gstat <- gstat(formula=Cubx_res~1, data=Cu,locations = ~X+Y)
Cu.vario <- variogram(Cu.gstat,cutoff=40000,width=3000)
vg.exp <- vgm(model = 'Sph',nugget = 0.01)
fit.Cu <- fit.variogram(Cu.vario, model = vg.exp,fit.method = 6)
plot(Cu.vario,model=fit.Cu, main="Variogram résidus du Cuivre",pch=16,col='black')
trellis.focus('panel',1,1)
llines(x=c(0,max(Cu.vario$dist)),y=c(var(Cu$Cubx_res)),col = 'red',lty=2)## NULL
| model | psill | range | kappa | ang1 | ang2 | ang3 | anis1 | anis2 |
|---|---|---|---|---|---|---|---|---|
| Nug | 0.01108 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
| Sph | 0.01495 | 15179 | 0.5 | 0 | 0 | 0 | 1 | 1 |
Demande normalité, stationnarité d’ordre 1 et 2
plot.predictions <- function(pred,data, plot.title,legend){
ggplot() +
geom_tile(data=pred,
aes(x = X, y = Y, fill = predi)) +
geom_point(data=data,
aes(x=X, y=Y, color="Measurement points"),
shape=18,
size=2) +
scale_color_manual("", values="black") +
scale_fill_gradientn(name=legend, colors=c('royalblue','green3', 'yellow', 'red')) +
theme(legend.key = element_rect(fill = "green3",
color = NA)) +
xlab("Longitude") +
ylab("Latitude") +
ggtitle(plot.title)
}powers <- seq(0.5,5,0.5)
pmse <- data.table(power = powers, mse = rep(0,length(powers)) )# Power mean squared error
for (p in powers){
pse <- rep(0,nrow(Pb)) # Power squared errors
for (i in 1:nrow(Pb)){
point.idw <- idw(formula = Pbbx~1,
data = Pb[-i,],
locations = ~X+Y,
newdata = Pb[i,],
idp = p,
nmax=20,
maxdist=30000,
debug.level = 0)
pse[i] <- (point.idw$var1.pred - Pb$Pbbx[i])^2
}
pmse[power==p,"mse"] = mean(pse)
}
plot(pmse)#Plot résultat
Pb.idw <- data.table(idw(formula = Pbbx~1,data=Pb,location=~X+Y,newdata=prov.grid,idp=1.5,nmax=20))## [inverse distance weighted interpolation]
setnames(Pb.idw,"var1.pred","predBx")
#Transfo inverse
alpha = BPb$lambda.hat
Pb.idw[,predi := exp(log(alpha * predBx + 1) / alpha)]
plot.predictions(Pb.idw,Pb,"Concentration en plomb - Prediction Distance inverse","Pb predictions")powers <- seq(0.5,5,0.5)
pmse <- data.table(power = powers, mse = rep(0,length(powers)) )# Power mean squared error
for (p in powers){
pse <- rep(0,nrow(Zn)) # Power squared errors
for (i in 1:nrow(Zn)){
point.idw <- idw(formula = Znbx~1,
data = Zn[-i,],
locations = ~X+Y,
newdata = Zn[i,],
idp = p,
nmax=20,
maxdist=30000,
debug.level = 0)
pse[i] <- (point.idw$var1.pred - Zn$Znbx[i])^2
}
pmse[power==p,"mse"] = mean(pse)
}
plot(pmse)Zn.idw <- data.table(idw(formula = Znbx~1,data=Zn,location=~X+Y,newdata=prov.grid,idp=1,nmax=20,maxdist=30000))## [inverse distance weighted interpolation]
setnames(Zn.idw,"var1.pred","predBx")
# Transformation inverse
alpha = BZn$lambda.hat
Zn.idw[,predi := exp(log(alpha * predBx + 1) / alpha)]
plot.predictions(Zn.idw,Zn,"Concentration en Zinc - Prediction Distance inverse","Zn predictions")powers <- seq(0.4,2,0.2)
pmse <- data.table(power = powers, mse = rep(0,length(powers)) )# Power mean squared error
for (p in powers){
pse <- rep(0,nrow(Cu)) # Power squared errors
for (i in 1:nrow(Cu)){
point.idw <- idw(formula = Cubx~1,
data = Cu[-i,],
locations = ~X+Y,
newdata = Cu[i,],
idp = p,
nmax=20,
maxdist=30000,
debug.level = 0)
pse[i] <- (point.idw$var1.pred - Cu$Cubx[i])^2
}
pmse[power==p,"mse"] = mean(pse)
}
plot(pmse)Cu.idw <- data.table(idw(formula = Cubx~1,data=Cu,location=~X+Y,newdata=prov.grid,idp=1.25,nmax=20,maxdist=30000))## [inverse distance weighted interpolation]
setnames(Cu.idw,"var1.pred","predBx")
# Transformation inverse
alpha = BCu$lambda.hat
Cu.idw[,predi := exp(log(alpha * predBx + 1) / alpha)]
plot.predictions(Cu.idw,Cu,"Concentration en Cuivre - Prediction Distance inverse","Cu predictions")doublons <- which(duplicated(Pb$X,Pb$Y))
Pb2<-Pb[-doublons,]
Pb.krig <- data.table(krige(formula = Pbbx_res~1, data = Pb2,locations = ~X+Y,
newdata = prov.grid, model = fit.Pb))## [using ordinary kriging]
setnames(Pb.krig, c("var1.pred", "var1.var"), c("pred", "Pb.varkrig"))
Pb.krig$pred <- Pb.krig$pred+predict(Pbbx.lm,Pb.krig)
alpha <- BPb$lambda.hat
Pb.krig[,predi := exp(log(alpha * pred + 1) / alpha)]
plot.predictions(Pb.krig,Pb,"Concentration en Plomb - Krigeage","Pb prediction")doublons <- which(duplicated(Zn$X,Zn$Y))
Zn2<-Zn[-doublons,]
Zn.krig <- data.table(krige(formula = Znbx_res~1, data = Zn2,locations = ~X+Y,
newdata = prov.grid, model = fit.Zn))## [using ordinary kriging]
setnames(Zn.krig, c("var1.pred", "var1.var"), c("pred", "Zn.varkrig"))
Zn.krig$pred <- Zn.krig$pred+predict(Znbx.lm,Zn.krig)
alpha <- BZn$lambda.hat
Zn.krig[,predi := exp(log(alpha * pred + 1) / alpha)]
plot.predictions(Zn.krig,Zn,"Concentration en Zinc - Krigeage","Zn predictions")doublons <- which(duplicated(Cu$X,Cu$Y))
Cu2<-Cu[-doublons,]
Cu.krig <- data.table(krige(formula = Cubx_res~1, data = Cu2,locations = ~X+Y,
newdata = prov.grid, model = fit.Cu,nmax=1))## [using ordinary kriging]
setnames(Cu.krig, c("var1.pred", "var1.var"), c("pred", "Cu.varkrig"))
Cu.krig$pred <- Cu.krig$pred+predict(Cubx.lm,Cu.krig)
alpha <- BCu$lambda.hat
Cu.krig[,predi := exp(log(alpha * pred + 1) / alpha)]
plot.predictions(Cu.krig,Cu,"Concentration en Cuivre - Krigeage","Cu predictions")g <- gstat(id="Pb", formula=Pbbx~1, data=Pb2, locations= ~X+Y)
g <- gstat(g,id='Zn',formula = Znbx~1, data = Zn2, locations = ~X+Y)
v.cross <- variogram(g, cutoff=40000, width=3000)
plot(v.cross)## data:
## Pb : formula = Pbbx`~`1 ; data dim = 1077 x 3
## Zn : formula = Znbx`~`1 ; data dim = 1530 x 3
## variograms:
## model psill range
## Pb[1] Nug 0.001364857 0.00
## Pb[2] Sph 0.001252742 20020.83
## Zn[1] Nug 0.002195824 0.00
## Zn[2] Sph 0.011670383 20020.83
## Pb.Zn[1] Nug 0.001573801 0.00
## Pb.Zn[2] Sph 0.003476007 20020.83
## ~X + Y
g <- copy(LMC)
g <- gstat(g, id='Zn', formula = Znbx~1,data = Zn2,locations = ~X+Y,model = LMC$model$Zn)
plot(v.cross,model=g$model,col='black',pch=16)## Linear Model of Coregionalization found. Good.
## [using ordinary cokriging]
setnames(Pb.cok, c('Pb.pred','Pb.var'),c('pred','Pb.varcok'))
#Pb.cok$pred <- Pb.cok$pred+predict(Pbbx.lm,Pb.cok)
alpha <- BPb$lambda.hat
Pb.cok[,predi := exp(log(alpha * pred + 1) / alpha)]
plot.predictions(Pb.cok,Pb,'Concentration en Plomb - Cokrigeage','Prediction')